{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/siddhesh1793/anaconda3/envs/clip_classifier/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from model import MLP\n",
    "from dataset_ffcv import get_dataloaders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load model based on output dir\n",
    "model_num = 68\n",
    "output_dir = \"outputs/2022-11-09/17-24-47\"\n",
    "model_ckpt_filepath = os.path.join(output_dir, f\"model_{model_num}.ckpt\")\n",
    "config_filepath = os.path.join(output_dir, \"wandb\")\n",
    "\n",
    "files = os.listdir(config_filepath)\n",
    "folder = None\n",
    "for file in files:\n",
    "\tif \"run-\" in file:\n",
    "\t\tfolder = file\n",
    "\t\tbreak\n",
    "\n",
    "config_filepath = os.path.join(config_filepath, folder, \"files\", \"config.yaml\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "with open(config_filepath, \"r\") as f:\n",
    "\tconfig = yaml.safe_load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MLP(\n",
       "  (layers): ModuleList(\n",
       "    (0): Linear(in_features=1024, out_features=512, bias=True)\n",
       "    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "    (2): Linear(in_features=512, out_features=512, bias=True)\n",
       "    (3): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "    (4): ReLU()\n",
       "    (5): Linear(in_features=512, out_features=1000, bias=False)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "model = MLP(**config['model']['value']).to('cuda')\n",
    "model_dict = torch.load(model_ckpt_filepath)\n",
    "model.load_state_dict(torch.load(model_ckpt_filepath)['model_state_dict'])\n",
    "model.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create ffcv dataset objects\n",
    "# config['dataloader']['value']['num_workers'] = 8\n",
    "_, _, test_dl = get_dataloaders(**config['dataset']['value'], **config['dataloader']['value'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_batch(model, data, f):\n",
    "\t# Get top 5 classification per image\n",
    "\toutput = model(data[2])\n",
    "\n",
    "\ttopk_val, topk_idxs = torch.topk(output, k=5, dim=-1)\n",
    "\t# TODO - consider doing softmax and then topk\n",
    "\n",
    "\t# Iterate through the batch and write the predictions to submission file f\n",
    "\t# Keep in mind that predictions are 1 indexed, so range from 1 to 1000\n",
    "\ttopk_idxs += 1\n",
    "\n",
    "\tfor idx in range(topk_idxs.shape[0]):\n",
    "\t\tlist = topk_idxs[idx].cpu().numpy().tolist()\n",
    "\t\tif len(list) > 5:\n",
    "\t\t\tprint (list)\n",
    "\n",
    "\t\tmax_val = max(list)\n",
    "\t\tmin_val = min(list)\n",
    "\n",
    "\t\tif max_val > 1000:\n",
    "\t\t\tprint (list)\n",
    "\n",
    "\t\tif min_val < 1:\n",
    "\t\t\tprint (list)\n",
    "\n",
    "\t\ttopk_string = ' '.join([str(class_idx) for class_idx in topk_idxs[idx].cpu().numpy().tolist()]  + [\" \\n\"])\n",
    "\t\tf.write(topk_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('predictions.txt', 'a') as f:\n",
    "\tfor data in test_dl:\n",
    "\t\tprocess_batch(model, data, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100000\n"
     ]
    }
   ],
   "source": [
    "with open('predictions.txt', 'r') as f:\n",
    "\tprint (len(list(f.readlines())))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.13 ('clip_classifier')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "662a676bd14e63d833415312f5b884df48fa63ef3f197b360d4f306d87ea26d9"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
